import requests
import time, datetime
import os, sys
import multiprocessing as mp
from bs4 import BeautifulSoup

ServiceKey = 'wUhbq/9OQDrudjkNWRXEtRsi831Z3MTe89qOGbvQi8M='
lastlist = [170100, 179999, 185416, 204999, 209999, 214999, 209999, 214999, 219999]


class BlockedUserError(Exception):
	def __init__(self):
		super().__init__('Service key is blocked')


def check_to_skip(appnumber, year):
	try:
		f = open('D:\\KIPRIS\\Citation\\{}\\{}.xml'.format(year, appnumber), 'r', encoding="utf-8")
		return 1
	except FileNotFoundError:
		return 0


def downloader(ind, year):
	digit = len(str(ind))
	appnumber = "10"+ str(year) + "7" + "0"*(6-digit) + str(ind)
	url = "http://plus.kipris.or.kr/openapi/rest/CitationService/citationInfoV3?applicationNumber={}&accessKey={}".format(appnumber, ServiceKey)
	# Checking whether we may skip the appnumber
	if check_to_skip(appnumber, year) == 1:
		print("{} is already downloaded.".format(appnumber))
	else:
		# Request session
		start = time.time()
		while True:
			try:
				response = requests.get(url, timeout = 60)
				break
			except requests.exceptions.Timeout:
				print("Request {} tmied out - going to sleep for 60 secs - {}".format(appnumber, datetime.datetime.now()))
				time.sleep(60)
				pass
			except requests.exceptions.ConnectionError:
				print("ConnectionError for requst {} - retry".format(appnumber))
				pass
			except:
				print("Unknown Error for request {} - retry".format(appnumber))
				pass
		A = time.time() - start
		if A < 0.22:
			time.sleep(0.22 - A)
		data = response.text
		# Detecting BlockedUserError 
		soup = BeautifulSoup(data, 'xml')
		try:
			if soup.find("resultMsg").text == "Blocked users.":
				raise BlockedUserError
			pass
		except Exception as e:
			print(e)
			print(datetime.datetime.now())
			sys.exit()
		# Saving the response
		with open('D:\\KIPRIS\\Citation\\{}\\{}.xml'.format(year, appnumber), 'w', encoding="utf-8") as f:
			f.write(data)
		n = os.path.getsize('D:\\KIPRIS\\Citation\\{}\\{}.xml'.format(year, appnumber))
		size = round(n / 1024.0, 1)
		A = round(A, 3)
		print(str(appnumber)+ ' - ' + str(response.status_code)+ ' - ' +str(A) +'sec - ' +str(size) + 'KB')


def downloader_pctold(ind, year):
	digit = len(str(ind))
	appnumber = "10"+ str(year) + "07" + "0"*(5-digit) + str(ind)
	url = "http://plus.kipris.or.kr/openapi/rest/CitationService/citationInfoV3?applicationNumber={}&accessKey={}".format(appnumber, ServiceKey)
	# Checking whether we may skip the appnumber
	if check_to_skip(appnumber, year) == 1:
		print("{} is already downloaded.".format(appnumber))
	else:
		# Request session
		start = time.time()
		while True:
			try:
				response = requests.get(url, timeout = 60)
				break
			except requests.exceptions.Timeout:
				print("Request {} tmied out - going to sleep for 60 secs - {}".format(appnumber, datetime.datetime.now()))
				time.sleep(60)
				pass
			except requests.exceptions.ConnectionError:
				print("ConnectionError for requst {} - retry".format(appnumber))
				pass
			except:
				print("Unknown Error for request {} - retry".format(appnumber))
				pass
		A = time.time() - start
		if A < 0.22:
			time.sleep(0.22 - A)
		data = response.text
		# Detecting BlockedUserError 
		soup = BeautifulSoup(data, 'xml')
		try:
			if soup.find("resultMsg").text == "Blocked users.":
				raise BlockedUserError
			pass
		except Exception as e:
			print(e)
			print(datetime.datetime.now())
			sys.exit()
		# Saving the response
		with open('D:\\KIPRIS\\Citation\\{}\\{}.xml'.format(year, appnumber), 'w', encoding="utf-8") as f:
			f.write(data)
		n = os.path.getsize('D:\\KIPRIS\\Citation\\{}\\{}.xml'.format(year, appnumber))
		size = round(n / 1024.0, 1)
		A = round(A, 3)
		print(str(appnumber)+ ' - ' + str(response.status_code)+ ' - ' +str(A) +'sec - ' +str(size) + 'KB')

def by_year_old(year):
	try:
		os.mkdir('D:\\KIPRIS\\Citation\\{}'.format(year))
	except:
		pass
	last = 20000
	indlist = range(1, last+1)
	for ind in indlist:
		downloader_pctold(ind, year)

def by_year(year):
	try:
		os.mkdir('D:\\KIPRIS\\Citation\\{}'.format(year))
	except:
		pass
	last = 50000
	indlist = range(1, last+1)
	for ind in indlist:
		downloader_pct(ind, year)

if __name__ == '__main__':
	pool = mp.Pool(9)
	pool.map(by_year_old, range(1984, 1999))
	pool.map(by_year, range(1999, 2018))